
clear
cd "D:\data_replication"

use estimation\1_data_format\data_base.dta, clear

keep if year < 2006
keep if declarant == 4

sort product_id
gen dom_share_temp = exp_share if declarant == partner
by product_id: egen dom_share = mean(dom_share_temp)
by product_id: keep if _n == 1

keep pc8plus dom_share
gen no_dom_pc8plus = 1
replace no_dom_pc8plus = 0 if dom_share != .
gen pc = pc8plus
// 344

gen ll = length(pc8plus)
gen pc7 = substr(pc8plus, 1, 7) if ll == 8
replace pc7 = pc8plus if ll != 8
replace pc7 = "2511157" if pc8plus == "7.2004" 
replace pc7 = "3350111" if pc8plus == "42.2007" 
replace pc7 = "3511213" if pc8plus == "86.2005" 
replace pc7 = "2521307" if pc8plus == "83.2005" 
replace pc7 = "2123111" if pc8plus == "13.2007" 
replace pc7 = "1824231" if pc8plus == "75.2005" 
replace pc7 = "3512115" if pc8plus == "13.2004" 
sort pc7
by pc7: egen dom_share_pc7 = mean(dom_share)
by pc7: egen replace_pc7 = sum(no_dom_pc8plus)
gen no_dom_pc7 = 1
replace no_dom_pc7 = 0 if dom_share_pc7 != .
replace pc = pc7 if replace_pc7 > 0 & dom_share_pc7 != .
// 259

gen pc6 = substr(pc8plus, 1, 6) if ll == 8
replace pc6 = pc8plus if ll != 8
replace pc6 = "251115" if pc8plus == "7.2004" 
replace pc6 = "211251" if pc8plus == "4.2004" 
replace pc6 = "292212" if pc8plus == "3.2007" 
replace pc6 = "211257" if pc8plus == "5.2004" 
replace pc6 = "335011" if pc8plus == "42.2007" 
replace pc6 = "351121" if pc8plus == "86.2005" 
replace pc6 = "252130" if pc8plus == "83.2005" 
replace pc6 = "268211" if pc8plus == "33.2007" 
replace pc6 = "296014" if pc8plus == "37.2007" 
replace pc6 = "300113" if pc8plus == "39.2007" 
replace pc6 = "212311" if pc8plus == "13.2007" 
replace pc6 = "182423" if pc8plus == "75.2005" 
replace pc6 = "182235" if pc8plus == "62.2005" 
replace pc6 = "351211" if pc8plus == "13.2004" 
replace pc6 = "193031" if pc8plus == "78.2005" 
sort pc6
by pc6: egen dom_share_pc6 = mean(dom_share)
by pc6: egen replace_pc6 = sum(no_dom_pc7)
gen no_dom_pc6 = 1
replace no_dom_pc6 = 0 if dom_share_pc6 != .
replace pc = pc6 if replace_pc6 > 0 & dom_share_pc6 != .
// 92

gen pc5 = substr(pc8plus, 1, 5) if ll == 8
replace pc5 = pc8plus if ll != 8
replace pc5 = "25111" if pc8plus == "7.2004" 
replace pc5 = "21125" if pc8plus == "4.2004" 
replace pc5 = "29221" if pc8plus == "3.2007" 
replace pc5 = "21125" if pc8plus == "5.2004" 
replace pc5 = "33403" if pc8plus == "7.2007"
replace pc5 = "33501" if pc8plus == "42.2007"  
replace pc5 = "35112" if pc8plus == "86.2005" 
replace pc5 = "25213" if pc8plus == "83.2005" 
replace pc5 = "26821" if pc8plus == "33.2007" 
replace pc5 = "29601" if pc8plus == "37.2007" 
replace pc5 = "30011" if pc8plus == "39.2007" 
replace pc5 = "35302" if pc8plus == "45.2007" 
replace pc5 = "21231" if pc8plus == "13.2007" 
replace pc5 = "18242" if pc8plus == "75.2005" 
replace pc5 = "18223" if pc8plus == "62.2005" 
replace pc5 = "35121" if pc8plus == "13.2004" 
replace pc5 = "15332" if pc8plus == "64.2007" 
replace pc5 = "19303" if pc8plus == "78.2005" 
replace pc5 = "14111" if pc5 == "13101" | pc5 == "13201"
sort pc5
by pc5: egen dom_share_pc5 = mean(dom_share)
by pc5: egen replace_pc5 = sum(no_dom_pc6)
gen no_dom_pc5 = 1
replace no_dom_pc5 = 0 if dom_share_pc5 != .
replace pc = pc5 if replace_pc5 > 0 & dom_share_pc5 != .
// 23

gen pc4 = substr(pc8plus, 1, 4) if ll == 8
replace pc4 = pc8plus if ll != 8
replace pc4 = "2511" if pc8plus == "7.2004" 
replace pc4 = "2112" if pc8plus == "4.2004" 
replace pc4 = "2922" if pc8plus == "3.2007" 
replace pc4 = "2112" if pc8plus == "5.2004" 
replace pc4 = "3340" if pc8plus == "7.2007" 
replace pc4 = "3350" if pc8plus == "42.2007"
replace pc4 = "3511" if pc8plus == "86.2005"  
replace pc4 = "2521" if pc8plus == "83.2005" 
replace pc4 = "2682" if pc8plus == "33.2007" 
replace pc4 = "2960" if pc8plus == "37.2007" 
replace pc4 = "3001" if pc8plus == "39.2007" 
replace pc4 = "3530" if pc8plus == "45.2007" 
replace pc4 = "2123" if pc8plus == "13.2007" 
replace pc4 = "1824" if pc8plus == "75.2005" 
replace pc4 = "1822" if pc8plus == "62.2005" 
replace pc4 = "3512" if pc8plus == "13.2004" 
replace pc4 = "1533" if pc8plus == "64.2007" 
replace pc4 = "1930" if pc8plus == "78.2005" 
replace pc4 = "1411" if pc5 == "14111" 
sort pc4
by pc4: egen dom_share_pc4 = mean(dom_share)
by pc4: egen replace_pc4 = sum(no_dom_pc5)
gen no_dom_pc4 = 1
replace no_dom_pc4 = 0 if dom_share_pc4 != .
replace pc = pc4 if replace_pc4 > 0 & dom_share_pc4 != .
// 13, only synthetic

gen pc3 = substr(pc8plus, 1, 3) if ll == 8
replace pc3 = pc8plus if ll != 8
replace pc3 = "251" if pc8plus == "7.2004" 
replace pc3 = "211" if pc8plus == "4.2004" 
replace pc3 = "292" if pc8plus == "3.2007" 
replace pc3 = "211" if pc8plus == "5.2004" 
replace pc3 = "334" if pc8plus == "7.2007" 
replace pc3 = "335" if pc8plus == "42.2007" 
replace pc3 = "351" if pc8plus == "86.2005" 
replace pc3 = "252" if pc8plus == "83.2005" 
replace pc3 = "268" if pc8plus == "33.2007" 
replace pc3 = "296" if pc8plus == "37.2007" 
replace pc3 = "300" if pc8plus == "39.2007" 
replace pc3 = "353" if pc8plus == "45.2007" 
replace pc3 = "212" if pc8plus == "13.2007" 
replace pc3 = "182" if pc8plus == "75.2005" 
replace pc3 = "182" if pc8plus == "62.2005" 
replace pc3 = "351" if pc8plus == "13.2004" 
replace pc3 = "153" if pc8plus == "64.2007" 
replace pc3 = "193" if pc8plus == "78.2005" 
replace pc3 = "141" if pc5 == "14111" 
sort pc3
by pc3: egen dom_share_pc3 = mean(dom_share)
by pc3: egen replace_pc3 = sum(no_dom_pc4)
gen no_dom_pc3 = 1
replace no_dom_pc3 = 0 if dom_share_pc3 != .
replace pc = pc3 if replace_pc3 > 0 & dom_share_pc3 != .
// 13, only synthetic

gen pc2 = substr(pc8plus, 1, 2) if ll == 8
replace pc2 = pc8plus if ll != 8
replace pc2 = "25" if pc8plus == "7.2004" 
replace pc2 = "21" if pc8plus == "4.2004" 
replace pc2 = "29" if pc8plus == "3.2007" 
replace pc2 = "21" if pc8plus == "5.2004" 
replace pc2 = "33" if pc8plus == "7.2007" 
replace pc2 = "33" if pc8plus == "42.2007" 
replace pc2 = "35" if pc8plus == "86.2005" 
replace pc2 = "25" if pc8plus == "83.2005" 
replace pc2 = "26" if pc8plus == "33.2007" 
replace pc2 = "29" if pc8plus == "37.2007" 
replace pc2 = "30" if pc8plus == "39.2007" 
replace pc2 = "35" if pc8plus == "45.2007" 
replace pc2 = "21" if pc8plus == "13.2007" 
replace pc2 = "18" if pc8plus == "75.2005" 
replace pc2 = "18" if pc8plus == "62.2005" 
replace pc2 = "35" if pc8plus == "13.2004" 
replace pc2 = "15" if pc8plus == "64.2007" 
replace pc2 = "19" if pc8plus == "78.2005" 
replace pc2 = "14" if pc5 == "14111" 
sort pc2
by pc2: egen dom_share_pc2 = mean(dom_share)
by pc2: egen replace_pc2 = sum(no_dom_pc3)
gen no_dom_pc2 = 1
replace no_dom_pc2 = 0 if dom_share_pc2 != .
replace pc = pc2 if replace_pc2 > 0 & dom_share_pc2 != .

save estimation\1_product_list\output\A4_data_germany_temp.dta, replace


sort pc
gen pc8plus_d = 0
replace pc8plus_d = 1 if ll != 8 & pc8plus != pc
sort pc
by pc: egen pc8plus_comp = max(pc8plus_d)

keep if pc8plus_comp == 1
keep if ll != 8
keep pc pc8plus
order pc pc8plus
save estimation\1_product_list\output\A4_pc_comp_germany.dta, replace


use estimation\1_product_list\output\A4_data_germany_temp.dta, replace
sort pc
gen pc8plus_d = 0
replace pc8plus_d = 1 if ll != 8 & pc8plus != pc
sort pc
by pc: egen pc8plus_comp = max(pc8plus_d)

drop if pc8plus_comp == 1
sort pc
by pc: keep if _n == 1
keep pc 
save estimation\1_product_list\output\A4_pc_germany.dta, replace

rm estimation\1_product_list\output\A4_data_germany_temp.dta

